from pipeline example described by Pierre-Luc Germain, course sta426 UZH
# set local path
local.path <- getwd()
setwd(local.path)
# use source() function to source the functions we want to execute
source("functions/functions_qc.R")
source("functions/functions_analysis.R")
## data can be downloaded from : http://imlspenticton.uzh.ch/dump/files_for_levesque.tar
#pick your poison
#sces <- read_previous_data()
sces <- read_and_qc(6)
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient1_HS"
## [1] "Summary of discarded antibodies:"
## Mode FALSE TRUE
## logical 69647 667633
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"
## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
## Mode FALSE TRUE NA's
## logical 357639 4241 375400
## Limited
## Sig FALSE TRUE
## FALSE 357639 0
## TRUE 2 4239
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "
## [1] "Summary of the cells identified as dead cells:"
## Mode FALSE TRUE
## logical 3643 598
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"
## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
## LibSize NExprs MitoProp Total
## <integer> <integer> <integer> <integer>
## 1 41 66 598 663
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"
## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
##
## singlet doublet
## 3451 127
## [1] "Percentage of removed cells:"
## [1] 99.53193
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient1_SCC"
## [1] "Summary of discarded antibodies:"
## Mode FALSE TRUE
## logical 68998 668282
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"
## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
## Mode FALSE TRUE NA's
## logical 265491 2816 468973
## Limited
## Sig FALSE TRUE
## FALSE 265491 0
## TRUE 0 2816
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "
## [1] "Summary of the cells identified as dead cells:"
## Mode FALSE TRUE
## logical 2405 411
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"
## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
## LibSize NExprs MitoProp Total
## <integer> <integer> <integer> <integer>
## 1 24 40 411 448
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"
## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
##
## singlet doublet
## 2310 58
## [1] "Percentage of removed cells:"
## [1] 99.68669
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient2_HS"
## [1] "Summary of discarded antibodies:"
## Mode FALSE TRUE
## logical 70856 666424
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"
## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
## Mode FALSE TRUE NA's
## logical 348153 9225 379902
## Limited
## Sig FALSE TRUE
## FALSE 348153 0
## TRUE 88 9137
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "
## [1] "Summary of the cells identified as dead cells:"
## Mode FALSE TRUE
## logical 8229 996
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"
## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
## LibSize NExprs MitoProp Total
## <integer> <integer> <integer> <integer>
## 1 11 21 996 1015
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"
## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
##
## singlet doublet
## 7508 702
## [1] "Percentage of removed cells:"
## [1] 98.98166
## [1] "###################################"
## [1] "Quality Control:"
## [1] "patient2_AK"
## [1] "Summary of discarded antibodies:"
## Mode FALSE TRUE
## logical 68400 668880
## [1] "Distribution of the number of detected antibodies across all cells (red line = threshold, below which cells would be discarded):"
## [1] "Summaries of FDR smaller than 0.001 of the output of the empty Droplets algorithm:"
## Mode FALSE TRUE NA's
## logical 334067 5231 397982
## Limited
## Sig FALSE TRUE
## FALSE 334067 0
## TRUE 25 5206
## [1] "Histogram showing the P-values of the output of the empty Droplets algorithm: "
## [1] "Summary of the cells identified as dead cells:"
## Mode FALSE TRUE
## logical 4480 751
## [1] "Plot of cells, which will be discarded because they are identified as dead cells:"
## [1] "Summary of all cells that will be discarded and for what reasons:"
## DataFrame with 1 row and 4 columns
## LibSize NExprs MitoProp Total
## <integer> <integer> <integer> <integer>
## 1 128 127 751 824
## [1] "Plots looking at some quality metrics calculated by perCellQCMetrics:"
## [1] "Check that we don't accidently get rid of small cell populations because of our cleaning of the data:"
## [1] "Summary of the doublets identified by the scDblFinder algorithm:"
##
## singlet doublet
## 4198 209
## [1] "Percentage of removed cells:"
## [1] 99.43061
#sces <- read_10x()
sces <- lapply(sces, split_data)
# only select the gene expression data, drop the antibody capture
sce.patient1_HS <- sces[[1]]
sce.patient1_SCC <- sces[[2]]
sce.patient2_HS <- sces[[3]]
sce.patient2_AK <- sces[[4]]
# add name to sce for output graph
attr(sce.patient1_HS, "name") <- "Patient1 HS"
attr(sce.patient1_SCC, "name") <- "Patient1 SCC"
attr(sce.patient2_HS, "name") <- "Patient2 HS"
attr(sce.patient2_AK, "name") <- "Patient2 AK"
#convert rownames
# rownames(sce.patient1_HS) <- convert_rownames(sce.patient1_HS)
# rownames(sce.patient1_SCC) <- convert_rownames(sce.patient1_SCC)
# rownames(sce.patient2_HS) <- convert_rownames(sce.patient2_HS)
# rownames(sce.patient2_AK) <- convert_rownames(sce.patient2_AK)
#make sure every rowname has the format ENS_ID.Gene_Name
rownames(sce.patient1_HS) <- paste_rownames(sce.patient1_HS)
rownames(sce.patient1_SCC) <- paste_rownames(sce.patient1_SCC)
rownames(sce.patient2_HS) <- paste_rownames(sce.patient2_HS)
rownames(sce.patient2_AK) <- paste_rownames(sce.patient2_AK)
The markers found in the literature Kim, Chung, and Kim (2020) Solé-Boldo (2020) were used to separate the different cell types and their subtypes within them. A few markers didn’t appear at all in our data (WISP2 [Secretory-reticular fibroblast], SEPP1 [Macrophage], TYP1 [Melanocyte]) so the corresponding cell types were selected using their remaining markers present. The Pericyte and vSMC markers were removed because they clustered badly and polluted the Fibroblast population. Since those markers are known to correlate with specific cell types, their genes were added to the list of high variable genes used to produce a lower dimensional representation of the data, even when their expression didn’t vary enough to be selected as highly variable.
| Types | Subtypes | Markers |
|---|---|---|
| Keratinocyte | DSC3 DSP LGALS7 | |
| basal | KRT5 KRT14 COL17A1 | |
| suprabasal | KRT1 KRT10 | |
| differentiated | LOR SPINK5 | |
| ORS | KRT6B | |
| channel | GJB2 GJB6 ATP1B1 | |
| sebaceous gland | MGST1 FASN | |
| sweat gland | DCD AQP5 | |
| Fibroblast | DCN LUM MMP2 | |
| secretory reticular | SLPI CTHRC1 MFAP5 TSPAN8 | |
| proinflammatory | CCL19 APOE CXCL2 CXCL3 EFEMP1 | |
| secretory papillary | APCDD1 ID1 WIF1 COL18A1 PTGDS | |
| mesenchymal | ASPN POSTN GPC3 TNN SFRP1 | |
| Pericyte & vSMC | ACTA2 TAGLN | |
| pericyte | RGS5 | |
| vSMC | MYL9 TPM2 RERGL | |
| Endothelial cell | PECAM1 SELE CLDN5 VWF | |
| lymphatic | PROX1 LYVE1 | |
| Myeloid | HLA-DRA FCER1G TYROBP AIF1 | |
| dendritic | CD1C | |
| langerhans | CD207 | |
| macrophage | CD68 RNASE1 ITGAX | |
| Lymphocyte | CD3D CD3E CD52 IL7R | |
| Melanocyte | DCT MLANA PMEL | |
| Schwann cell | CDH19 NGFR | |
| Mitotic cell | UBE2C PCNA |
# run PCA
# using known genes
# genes not found in the data : WISP2" "SEPP1" "TYP1"
# genes linked to pericytevSMC : "ACTA2","TAGLN"
# were removed from the following list
known_markers <- c("DSC3","DSP","LGALS7","KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5","DCN","LUM","MMP2","SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1","RGS5","MYL9","TPM2","RERGL","PECAM1","SELE","CLDN5","VWF","PROX1","LYVE1","HLA-DRA","FCER1G","TYROBP","AIF1","CD1C","CD207","CD68","RNASE1","ITGAX","CD3D","CD3E","CD52","IL7R","DCT","MLANA","PMEL","CDH19","NGFR","UBE2C","PCNA")
sce.patient1_HS <- sc_PCA(sce.patient1_HS, hvg.patient1_HS, known_markers)
## Adding IL7R NGFR to the gene set used for PCA in Patient1 HS
sce.patient1_SCC <- sc_PCA(sce.patient1_SCC, hvg.patient1_SCC, known_markers)
## Adding KRT10 APOE TNN AIF1 CD3E NGFR to the gene set used for PCA in Patient1 SCC
sce.patient2_HS <- sc_PCA(sce.patient2_HS, hvg.patient2_HS, known_markers)
## Adding COL17A1 ID1 AIF1 ITGAX CD3E to the gene set used for PCA in Patient2 HS
sce.patient2_AK <- sc_PCA(sce.patient2_AK, hvg.patient2_AK, known_markers)
## Adding CXCL2 GPC3 TNN FCER1G ITGAX CD3E to the gene set used for PCA in Patient2 AK
results <- sc_cluster(sce.patient1_HS)
sce.patient1_HS <- results[[1]]
g.patient1_HS <- results[[2]]
results <- sc_cluster(sce.patient1_SCC)
sce.patient1_SCC <- results[[1]]
g.patient1_SCC <- results[[2]]
results <- sc_cluster(sce.patient2_HS)
sce.patient2_HS <- results[[1]]
g.patient2_HS <- results[[2]]
results <- sc_cluster(sce.patient2_AK)
sce.patient2_AK <- results[[1]]
g.patient2_AK <- results[[2]]
# genes not found in our data : "DCDN" "WISP2" "SEPP1" "TYP1"
# were removed from the list below
genes <- list(
# classification: https://www.sciencedirect.com/science/article/pii/S0923181120301985?via%3Dihub#bib0050
# KERATINOCYTE
keratinocyte = c("DSC3","DSP","LGALS7"),
#keratinocyte_basal = c("KRT5","KRT14","COL17A1"),
#keratinocyte_suprabasal = c("KRT1","KRT10"),
#keratinocyte_differentiated = c("LOR", "SPINK5"),
#keratinocyte_ORS = c("KRT6B"),
#keratinocyte_channel = c("GJB2","GJB6","ATP1B1"),
#keratinocyte_sebaceous_gland = c("MGST1","FASN"),
#keratinocyte_sweat_gland = c("DCD","AQP5"),
# FIBROBLAST
fibroblast = c("DCN","LUM","MMP2"),
# fibroblast subclassification from: https://www.nature.com/articles/s42003-020-0922-4#
#fibroblast_secretory_reticular = c(SLPI","CTHRC1","MFAP5","TSPAN8"),
#fibroblast_proinflammatory = c("CCL19","APOE","CXCL2","CXCL3","EFEMP1"),
#fibroblast_secretory_papillary = c("APCDD1","ID1","WIF1","COL18A1","PTGDS"),
#fibroblast_mesenchymal = c("ASPN","POSTN","GPC3","TNN","SFRP1"),
# PERICYTE & vSMC
pericytevSMC = c("ACTA2","TAGLN"), #vSMC : vascular smooth muscel cell
#pericytevSMC_pericyte = c("RGS5"),
#pericytevSMC_vSMC = c("MYL9","TPM2","RERGL"),
# ENDOTHELIAL CELL
endothelial = c("PECAM1","SELE","CLDN5","VWF"),
#endothelial_lymphatic = c("PROX1","LYVE1"),
# MYELOID CELL
myeloid = c("HLA-DRA","FCER1G","TYROBP","AIF1"),
#myeloid_dendritic = c("CD1C"),
#myeloid_langerhans = c("CD207"),
#myeloid_macrophage = c("CD68","RNASE1","ITGAX"),
# LYMPHOCYTE
lymphocyte = c("CD3D","CD3E","CD52","IL7R"),
# MELANOCYTE
melanocyte = c("DCT","MLANA","PMEL"),
# SCHWANN CELL
schwann = c("CDH19","NGFR"),
# MITOTIC CELL
mitotic = c("UBE2C","PCNA")
)
# TODO: try to move the convert_rownames step higher in a block without output so we're less
# likely to run it multiple time (which would lead to weird output)
#rownames(sce.patient1_HS) <- convert_rownames(sce.patient1_HS, g.patient1_HS, genes)
results <- annotate_cells(sce.patient1_HS, g.patient1_HS, genes)
sce.patient1_HS <- results[[1]]
km.patient1_HS <- results[[2]]
#rownames(sce.patient1_SCC) <- convert_rownames(sce.patient1_SCC, g.patient1_SCC, genes)
results <- annotate_cells(sce.patient1_SCC, g.patient1_SCC, genes)
sce.patient1_SCC <- results[[1]]
km.patient1_SCC <- results[[2]]
#rownames(sce.patient2_HS) <- convert_rownames(sce.patient2_HS, g.patient2_HS, genes)
results <- annotate_cells(sce.patient2_HS, g.patient2_HS, genes)
sce.patient2_HS <- results[[1]]
km.patient2_HS <- results[[2]]
#rownames(sce.patient2_AK) <- convert_rownames(sce.patient2_AK, g.patient2_AK, genes)
results <- annotate_cells(sce.patient2_AK, g.patient2_AK, genes)
sce.patient2_AK <- results[[1]]
km.patient2_AK <- results[[2]]
results <- pseudobulk(sce.patient1_HS, km.patient1_HS)
sce.patient1_HS <- results[[1]]
pb.patient1_HS <- results[[2]]
mat.patient1_HS <- results[[3]]
cl2.patient1_HS <- results[[4]]
#Patient 1 SCC
results <- pseudobulk(sce.patient1_SCC, km.patient1_SCC)
sce.patient1_SCC <- results[[1]]
pb.patient1_SCC <- results[[2]]
mat.patient1_SCC <- results[[3]]
cl2.patient1_SCC <- results[[4]]
#Patient 2 HS
results <- pseudobulk(sce.patient2_HS, km.patient2_HS)
sce.patient2_HS <- results[[1]]
pb.patient2_HS <- results[[2]]
mat.patient2_HS <- results[[3]]
cl2.patient2_HS <- results[[4]]
#Patient 2 AK
results <- pseudobulk(sce.patient2_AK, km.patient2_AK)
sce.patient2_AK <- results[[1]]
pb.patient2_AK <- results[[2]]
mat.patient2_AK <- results[[3]]
cl2.patient2_AK <- results[[4]]
# create barplots of cell type proportions
df.celltypes.patient1 <- rbind(df_barplot_celltypes(sce.patient1_HS), df_barplot_celltypes(sce.patient1_SCC))
df.celltypes.patient1$Type <- c("HS", "HS", "HS", "HS", "HS", "HS", "HS", "HS", "SCC", "SCC", "SCC", "SCC", "SCC", "SCC", "SCC", "SCC")
df.celltypes.patient2 <- rbind(df_barplot_celltypes(sce.patient2_HS), df_barplot_celltypes(sce.patient2_AK))
df.celltypes.patient2$Type <- c("HS", "HS", "HS", "HS", "HS", "HS", "HS", "HS", "AK", "AK", "AK", "AK", "AK", "AK", "AK", "AK")
dynamic_barplot(df.celltypes.patient1, "Patient 1", T, "Proportion of cell types in")
dynamic_barplot(df.celltypes.patient2, "Patient 2", T, "Proportion of cell types in")
known_markers.kera <- c("KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5")
#annotate the subclusters
genes.kera <- list(
# classification: https://www.sciencedirect.com/science/article/pii/S0923181120301985?via%3Dihub#bib0050
# KERATINOCYTE
#keratinocyte = c("DSC3","DSP","LGALS7"),
basal = c("KRT5","KRT14","COL17A1"),
suprabasal = c("KRT1","KRT10"),
differentiated = c("LOR", "SPINK5"),
ORS = c("KRT6B"),
channel = c("GJB2","GJB6","ATP1B1"),
sebaceous_gland = c("MGST1","FASN"),
sweat_gland = c("DCD","AQP5")
)
# TODO : find a way to avoid giving both known_merkers.kera and genes.kera since both contain the
# same information in essence. Maybe will be fixed by the first todo
# yeah but then we are running an additional for loop/maybe we want different ones (hypothetically at least)
sce.patient1_HS.kera <- cluster_subtype(sce.patient1_HS, "keratinocyte", known_markers.kera, genes.kera)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|====== | 9%
|
|======== | 11%
|
|========== | 14%
|
|============ | 17%
|
|============== | 20%
|
|================ | 23%
|
|================== | 26%
|
|==================== | 29%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 37%
|
|============================ | 40%
|
|============================== | 43%
|
|================================ | 46%
|
|================================== | 49%
|
|==================================== | 51%
|
|====================================== | 54%
|
|======================================== | 57%
|
|========================================== | 60%
|
|============================================ | 63%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 71%
|
|==================================================== | 74%
|
|====================================================== | 77%
|
|======================================================== | 80%
|
|========================================================== | 83%
|
|============================================================ | 86%
|
|============================================================== | 89%
|
|================================================================ | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Adding KRT5 AQP5 to the gene set used for PCA in Patient1 HS
sce.patient1_SCC.kera <- cluster_subtype(sce.patient1_SCC, "keratinocyte", known_markers.kera, genes.kera)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|======= | 9%
|
|========= | 12%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 22%
|
|================== | 25%
|
|==================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 38%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================== | 59%
|
|============================================ | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|=========================================================== | 84%
|
|============================================================= | 88%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Adding FASN to the gene set used for PCA in Patient1 SCC
sce.patient2_HS.kera <- cluster_subtype(sce.patient2_HS, "keratinocyte", known_markers.kera, genes.kera)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|===== | 6%
|
|======= | 10%
|
|========= | 13%
|
|=========== | 16%
|
|============== | 19%
|
|================ | 23%
|
|================== | 26%
|
|==================== | 29%
|
|======================= | 32%
|
|========================= | 35%
|
|=========================== | 39%
|
|============================= | 42%
|
|================================ | 45%
|
|================================== | 48%
|
|==================================== | 52%
|
|====================================== | 55%
|
|========================================= | 58%
|
|=========================================== | 61%
|
|============================================= | 65%
|
|=============================================== | 68%
|
|================================================== | 71%
|
|==================================================== | 74%
|
|====================================================== | 77%
|
|======================================================== | 81%
|
|=========================================================== | 84%
|
|============================================================= | 87%
|
|=============================================================== | 90%
|
|================================================================= | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Adding KRT5 FASN to the gene set used for PCA in Patient2 HS
sce.patient2_AK.kera <- cluster_subtype(sce.patient2_AK, "keratinocyte", known_markers.kera, genes.kera)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|== | 3%
|
|==== | 6%
|
|======= | 9%
|
|========= | 12%
|
|=========== | 16%
|
|============= | 19%
|
|=============== | 22%
|
|================== | 25%
|
|==================== | 28%
|
|====================== | 31%
|
|======================== | 34%
|
|========================== | 38%
|
|============================ | 41%
|
|=============================== | 44%
|
|================================= | 47%
|
|=================================== | 50%
|
|===================================== | 53%
|
|======================================= | 56%
|
|========================================== | 59%
|
|============================================ | 62%
|
|============================================== | 66%
|
|================================================ | 69%
|
|================================================== | 72%
|
|==================================================== | 75%
|
|======================================================= | 78%
|
|========================================================= | 81%
|
|=========================================================== | 84%
|
|============================================================= | 88%
|
|=============================================================== | 91%
|
|================================================================== | 94%
|
|==================================================================== | 97%
|
|======================================================================| 100%
## Adding KRT5 to the gene set used for PCA in Patient2 AK
genes.dyn <- list(
# classification: https://www.sciencedirect.com/science/article/pii/S0092867420306723
keratinocyte_basal = "COL17A1",
keratinocyte_cycling = "MKI67",
keratinocyte_differentiating = "KRT1"
)
kera.dyn.patient1_HS <- dynamic_plot(sce.patient1_HS.kera, g.patient1_HS.kera, genes.dyn)
## class: SingleCellExperiment
## dim: 17092 1374
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(17092): ENSG00000241860.AL627309.5 ENSG00000237491.LINC01409
## ... ENSG00000278817.AC007325.4 ENSG00000277196.AC007325.2
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(1374): AAACCTGAGGGCTCTC-1 AAACCTGCAATGGAAT-1 ...
## TTTGTCACAATCGGTT-1 TTTGTCAGTTCACCTC-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient1_SCC <- dynamic_plot(sce.patient1_SCC.kera, g.patient1_SCC.kera, genes.dyn)
## class: SingleCellExperiment
## dim: 15583 482
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(15583): ENSG00000237491.LINC01409 ENSG00000228794.LINC01128
## ... ENSG00000277666.AC136352.3 ENSG00000273554.AC136616.1
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(482): AAACGGGGTCCGTGAC-1 AAACGGGGTCGGCACT-1 ...
## TTTGCGCGTCGGCATC-1 TTTGGTTAGTCATGCT-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient2_HS <- dynamic_plot(sce.patient2_HS.kera, g.patient2_HS.kera, genes.dyn)
## class: SingleCellExperiment
## dim: 15229 1593
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(15229): ENSG00000237491.LINC01409 ENSG00000228794.LINC01128
## ... ENSG00000271254.AC240274.1 ENSG00000277196.AC007325.2
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(1593): AAACCTGAGACATAAC-1 AAACGGGAGGAGCGAG-1 ...
## TTTGTCAGTACGCACC-1 TTTGTCATCAGTCAGT-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient2_AK <- dynamic_plot(sce.patient2_AK.kera, g.patient2_AK.kera, genes.dyn)
## class: SingleCellExperiment
## dim: 15578 450
## metadata(2): Samples scDblFinder.stats
## assays(2): counts logcounts
## rownames(15578): ENSG00000237491.LINC01409 ENSG00000228794.LINC01128
## ... ENSG00000273554.AC136616.1 ENSG00000277196.AC007325.2
## rowData names(4): ID Symbol Type scDblFinder.selected
## colnames(450): AAAGTAGCACCGCTAG-1 AAATGCCTCGGAATCT-1 ...
## TTTGGTTCATGAACCT-1 TTTGGTTGTGGTAACG-1
## colData names(15): Sample Barcode ... cluster2 cluster3
## reducedDimNames(3): PCA TSNE UMAP
## altExpNames(1): Antibody Capture
kera.dyn.patient1 <- rbind(kera.dyn.patient1_HS, kera.dyn.patient1_SCC)
kera.dyn.patient1$Type <- c("HS", "HS", "HS", "SCC", "SCC", "SCC")
kera.dyn.patient2 <- rbind(kera.dyn.patient2_HS, kera.dyn.patient2_AK)
kera.dyn.patient2$Type <- c("HS", "HS", "HS", "AK", "AK", "AK")
dynamic_barplot(kera.dyn.patient1, "Patient 1", F, "Proportion of Keratinocyte states in")
dynamic_barplot(kera.dyn.patient2, "Patient 2", F, "Proportion of Keratinocyte states in")
known_markers.fibro <- c("SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1")
genes.fibro <- list(
#fibroblast = c("DCN","LUM","MMP2"),
#fibroblast subclassification from: https://www.nature.com/articles/s42003-020-0922-4#
fibroblast_secretory_reticular = c("SLPI","CTHRC1","MFAP5","TSPAN8"),
fibroblast_proinflammatory = c("CCL19","APOE","CXCL2","CXCL3","EFEMP1"),
fibroblast_secretory_papillary = c("APCDD1","ID1","WIF1","COL18A1","PTGDS"),
fibroblast_mesenchymal = c("ASPN","POSTN","GPC3","TNN","SFRP1")
)
sce.patient1_HS.fibro <- cluster_subtype(sce.patient1_HS, "fibroblast", known_markers.fibro, genes.fibro)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|====== | 9%
|
|========= | 13%
|
|============ | 17%
|
|=============== | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================== | 35%
|
|=========================== | 39%
|
|============================== | 43%
|
|================================= | 48%
|
|===================================== | 52%
|
|======================================== | 57%
|
|=========================================== | 61%
|
|============================================== | 65%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|======================================================= | 78%
|
|========================================================== | 83%
|
|============================================================= | 87%
|
|================================================================ | 91%
|
|=================================================================== | 96%
|
|======================================================================| 100%
## Adding WIF1 to the gene set used for PCA in Patient1 HS
sce.patient1_SCC.fibro <- cluster_subtype(sce.patient1_SCC, "fibroblast", known_markers.fibro, genes.fibro)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
sce.patient2_HS.fibro <- cluster_subtype(sce.patient2_HS, "fibroblast", known_markers.fibro, genes.fibro)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|=== | 4%
|
|====== | 8%
|
|========= | 12%
|
|============ | 17%
|
|=============== | 21%
|
|================== | 25%
|
|==================== | 29%
|
|======================= | 33%
|
|========================== | 38%
|
|============================= | 42%
|
|================================ | 46%
|
|=================================== | 50%
|
|====================================== | 54%
|
|========================================= | 58%
|
|============================================ | 62%
|
|=============================================== | 67%
|
|================================================== | 71%
|
|==================================================== | 75%
|
|======================================================= | 79%
|
|========================================================== | 83%
|
|============================================================= | 88%
|
|================================================================ | 92%
|
|=================================================================== | 96%
|
|======================================================================| 100%
sce.patient2_AK.fibro <- cluster_subtype(sce.patient2_AK, "fibroblast", known_markers.fibro, genes.fibro)
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|==== | 5%
|
|======= | 10%
|
|========== | 15%
|
|============== | 20%
|
|================== | 25%
|
|===================== | 30%
|
|======================== | 35%
|
|============================ | 40%
|
|================================ | 45%
|
|=================================== | 50%
|
|====================================== | 55%
|
|========================================== | 60%
|
|============================================== | 65%
|
|================================================= | 70%
|
|==================================================== | 75%
|
|======================================================== | 80%
|
|============================================================ | 85%
|
|=============================================================== | 90%
|
|================================================================== | 95%
|
|======================================================================| 100%
## Adding CCL19 to the gene set used for PCA in Patient2 AK
The following code was initially used to check if the genes linked to the markers described in the literature were all found in the data set
data_markers <- rowData(sce.patient1_HS)$Symbol
known_markers <- c("DSC3","DSP","LGALS7","KRT5","KRT14","COL17A1","KRT1","KRT10","LOR", "SPINK5","KRT6B","GJB2","GJB6","ATP1B1","MGST1","FASN","DCD","AQP5","DCN","LUM","MMP2","WISP2","SLPI","CTHRC1","MFAP5","TSPAN8","CCL19","APOE","CXCL2","CXCL3","EFEMP1","APCDD1","ID1","WIF1","COL18A1","PTGDS","ASPN","POSTN","GPC3","TNN","SFRP1","ACTA2","TAGLN","RGS5","MYL9","TPM2","RERGL","PECAM1","SELE","CLDN5","VWF","PROX1","LYVE1","HLA-DRA","FCER1G","TYROBP","AIF1","CD1C","CD207","CD68","RNASE1","SEPP1","ITGAX","CD3D","CD3E","CD52","IL7R","DCT","MLANA","PMEL","TYP1","CDH19","NGFR","UBE2C","PCNA")
idx_notfound <- which(!(known_markers %in% rowData(sce.patient1_HS)$Symbol))
print(paste("Done:", length(known_markers)-length(idx_notfound), "/", length(known_markers), "markers found"))
cat("Genes missing in dataset: ", known_markers[idx_notfound])
Kim, Doyoung, Kyung Bae Chung, and Tae-Gyun Kim. 2020. “Application of Single-Cell Rna Sequencing on Human Skin: Technical Evolution and Challenges.” Journal of Dermatological Science 99 (2): 74–81. https://doi.org/https://doi.org/10.1016/j.jdermsci.2020.06.002.
Solé-Boldo, Raddatz, L. 2020. “Single-Cell Transcriptomes of the Human Skin Reveal Age-Related Loss of Fibroblast Priming.” Communications Biology 3 (188). https://doi.org/https://doi.org/10.1038/s42003-020-0922-4.